Invoke data set Mode from package mlogit. To do this you need to first load the package. This is a data set with choices about mode of transportation. This is done as follows:

data("Mode")

Once you have loaded the data set, answer the following questions: Describe this data set. How many variables are there and of which type (i.e., categorical/quantitative)?

glimpse(Mode)
## Rows: 453
## Columns: 9
## $ choice       <fct> car, rail, car, car, car, car, car, car, bus, car, rail, …
## $ cost.car     <dbl> 1.5070097, 6.0569985, 5.7946769, 1.8691439, 2.4989523, 4.…
## $ cost.carpool <dbl> 2.3356118, 2.8969191, 2.1374543, 2.5724266, 1.7220099, 0.…
## $ cost.bus     <dbl> 1.800512, 2.237128, 2.576385, 1.903518, 2.686000, 1.84765…
## $ cost.rail    <dbl> 2.358920, 1.855450, 2.747479, 2.268276, 2.973866, 2.31005…
## $ time.car     <dbl> 18.503200, 31.311107, 22.547429, 26.090282, 4.699140, 3.0…
## $ time.carpool <dbl> 26.338233, 34.256956, 23.255171, 29.896023, 12.414084, 9.…
## $ time.bus     <dbl> 20.86779, 67.18189, 63.30906, 19.75270, 43.09204, 12.8256…
## $ time.rail    <dbl> 30.03347, 60.29313, 49.17164, 13.47268, 39.74325, 43.5442…

How many different modes of transportation are in this data set? What is the most popular mode? What is the least popular mode?

table(Mode$choice)
## 
##     car carpool     bus    rail 
##     218      32      81     122
summary(Mode$choice)
##     car carpool     bus    rail 
##     218      32      81     122
ggplot(Mode)+ 
  geom_bar(aes(choice), color="black", fill="#6DB562")+
  theme_bw()

summary(Mode)
##      choice       cost.car       cost.carpool       cost.bus    
##  car    :218   Min.   :0.4099   Min.   :0.1293   Min.   :1.013  
##  carpool: 32   1st Qu.:3.6964   1st Qu.:0.9519   1st Qu.:1.783  
##  bus    : 81   Median :4.8796   Median :1.6665   Median :2.027  
##  rail   :122   Mean   :4.8735   Mean   :1.6863   Mean   :2.036  
##                3rd Qu.:6.2255   3rd Qu.:2.4581   3rd Qu.:2.321  
##                Max.   :8.8555   Max.   :3.2953   Max.   :2.740  
##    cost.rail        time.car       time.carpool       time.bus     
##  Min.   :1.272   Min.   : 2.404   Min.   : 8.385   Min.   : 1.969  
##  1st Qu.:1.947   1st Qu.:21.835   1st Qu.:28.391   1st Qu.:25.457  
##  Median :2.198   Median :37.497   Median :40.637   Median :41.415  
##  Mean   :2.212   Mean   :37.044   Mean   :39.771   Mean   :39.923  
##  3rd Qu.:2.476   3rd Qu.:53.104   3rd Qu.:51.843   3rd Qu.:52.805  
##  Max.   :3.113   Max.   :66.871   Max.   :65.009   Max.   :75.681  
##    time.rail     
##  Min.   : 4.621  
##  1st Qu.:28.143  
##  Median :40.034  
##  Mean   :39.505  
##  3rd Qu.:49.172  
##  Max.   :73.998

In general, what is the most expensive mode? The least expensive?

graf_costo<-ggplot(Mode) +
  geom_density(aes(cost.car, fill = "Costo de auto"), alpha = 0.6) +
  geom_density(aes(cost.carpool, fill = "Costo de carpools"), alpha = 0.6) +
  geom_density(aes(cost.bus, fill = "Costo de autobús"), alpha = 0.6) +
  geom_density(aes(cost.rail, fill = "Costo de ferrocarril"), alpha = 0.6) +
  scale_fill_manual(values = c("#0078BD", "#D61F33", "#F6D276", "#00B686"), name = "Variable") +
  labs(fill = "Variable") +
  theme_bw()

ggplotly(graf_costo)

Create a plot showing the univariate distributions of time by car and time by bus. Discuss.

graf_tiempo<-ggplot(Mode) +
  geom_density(aes(time.car, fill = "Tiempo en auto"), alpha = 0.6) +
  geom_density(aes(time.carpool, fill = "Tiempo en carpools"), alpha = 0.6) +
  geom_density(aes(time.bus, fill = "Tiempo en autobús"), alpha = 0.6) +
  geom_density(aes(time.rail, fill = "Tiempo en ferrocarril"), alpha = 0.6) +
  scale_fill_manual(values = c("#0078BD", "#D61F33", "#F6D276", "#00B686"), name = "Variable") +
  labs(fill = "Variable") +
  theme_bw()

ggplotly(graf_tiempo)

How do choices relate to cost by the different modes?

var_num<- names(Mode)[-1]

for (k in var_num) {
  graf<-ggplot(Mode)+
    geom_boxplot(aes(choice, eval(as.name(k)), fill=choice),
  show.legend= FALSE)+ ylab(k)+
    theme_bw()
  
  print(graf)
  
}